@InProceedings{SelimSaleh2011,
  author="Selim Saleh, Mahmoud Mohamed",
  editor="Gupta, Harsh K.",
  title="Body Waves",
  booktitle="Encyclopedia of Solid Earth Geophysics",
  year="2011",
  publisher="Springer Netherlands",
  address="Dordrecht",
  pages="29--35",
}

@article{berozaPhaseNet,
  author = {Zhu, Weiqiang and Beroza, Gregory C},
  title = "{PhaseNet: a deep-neural-network-based seismic arrival-time picking method}",
  journal = {Geophysical Journal International},
  volume = {216},
  number = {1},
  pages = {261-273},
  year = {2018},
  month = {10},
  abstract = "{As the number of seismic sensors grows, it is becoming increasingly difficult for analysts to pick seismic phases manually and comprehensively, yet such efforts are fundamental to earthquake monitoring. Despite years of improvements in automatic phase picking, it is difficult to match the performance of experienced analysts. A more subtle issue is that different seismic analysts may pick phases differently, which can introduce bias into earthquake locations. We present a deep-neural-network-based arrival-time picking method called “PhaseNet” that picks the arrival times of both P and S waves. Deep neural networks have recently made rapid progress in feature learning, and with sufficient training, have achieved super-human performance in many applications. PhaseNet uses three-component seismic waveforms as input and generates probability distributions of P arrivals, S arrivals and noise as output. We engineer PhaseNet such that peaks in the probability distributions provide accurate arrival times for both P and S waves. PhaseNet is trained on the prodigious available data set provided by analyst-labelled P and S arrival times from the Northern California Earthquake Data Center. The data set we use contains more than 700 000 waveform samples extracted from over 30 yr of earthquake recordings. We demonstrate that PhaseNet achieves much higher picking accuracy and recall rate than existing methods when applied to the waveforms of known earthquakes, which has the potential to increase the number of S-wave observations dramatically over what is currently available. This will enable both improved locations and improved shear wave velocity models.}",
  issn = {0956-540X},
  doi = {10.1093/gji/ggy423},
  url = {https://doi.org/10.1093/gji/ggy423},
  eprint = {https://academic.oup.com/gji/article-pdf/216/1/261/26329430/ggy423.pdf},
}

@article{fangshu2019,
  author = {Fangshu Yang and Jianwei Ma},
  title = {Deep-learning inversion: A next-generation seismic velocity model building method},
  journal = {GEOPHYSICS},
  volume = {84},
  number = {4},
  pages = {R583-R599},
  year = {2019},
  doi = {10.1190/geo2018-0249.1},
  URL = { https://doi.org/10.1190/geo2018-0249.1},
  eprint = {https://doi.org/10.1190/geo2018-0249.1},
}

@inproceedings{UnetDL,
  title={U-net: Convolutional networks for biomedical image segmentation},
  author={Ronneberger, Olaf and Fischer, Philipp and Brox, Thomas},
  booktitle={International Conference on Medical image computing and computer-assisted intervention},
  pages={234--241},
  year={2015},
  organization={Springer}
}

@InProceedings{skipconnectImportance,
  author="Drozdzal, Michal and Vorontsov, Eugene and Chartrand, Gabriel and Kadoury, Samuel and Pal, Chris",
  editor="Carneiro, Gustavo and Mateus, Diana and Peter, Lo{\"i}c and Bradley, Andrew and Tavares, Jo{\~a}o Manuel R. S. and Belagiannis, Vasileios and Papa, Jo{\~a}o Paulo and Nascimento, Jacinto C. and Loog, Marco and Lu, Zhi and Cardoso, Jaime S. and Cornebise, Julien",
  title="The Importance of Skip Connections in Biomedical Image Segmentation",
  booktitle="Deep Learning and Data Labeling for Medical Applications",
  year="2016",
  publisher="Springer International Publishing",
  address="Cham",
  pages="179--187",
  isbn="978-3-319-46976-8"
}

@InProceedings{Long_2015_CVPR,
  title={Fully convolutional networks for semantic segmentation},
  author={Long, Jonathan and Shelhamer, Evan and Darrell, Trevor},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={3431--3440},
  year={2015}
}

@InProceedings{ResNet_He_2016_CVPR,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

@InProceedings{DenseNet_Huang_2017_CVPR,
  author = {Huang, Gao and Liu, Zhuang and van der Maaten, Laurens and Weinberger, Kilian Q.},
  title = {Densely Connected Convolutional Networks},
  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  month = {July},
  year = {2017}
}

@InProceedings{deconv,
  title={Learning deconvolution network for semantic segmentation},
  author={Noh, Hyeonwoo and Hong, Seunghoon and Han, Bohyung},
  booktitle={Proceedings of the IEEE international conference on computer vision},
  pages={1520--1528},
  year={2015}
}

@INPROCEEDINGS{Rosenstein05totransfer,
  title={To transfer or not to transfer},
  author={Rosenstein, Michael T and Marx, Zvika and Kaelbling, Leslie Pack and Dietterich, Thomas G},
  booktitle={NIPS 2005 workshop on transfer learning},
  volume={898},
  pages={1--4},
  year={2005}
}

@article{phasenet_and_transfer,
  title={Using a deep neural network and transfer learning to bridge scales for seismic phase picking},
  author={Chai, Chengping and Maceira, Monica and Santos-Villalobos, Hector J and Venkatakrishnan, Singanallur V and Schoenball, Martin and Zhu, Weiqiang and Beroza, Gregory C and Thurber, Clifford and EGS Collab Team},
  journal={Geophysical Research Letters},
  volume={47},
  number={16},
  pages={e2020GL088651},
  year={2020},
  publisher={Wiley Online Library}
}

@Software{specfem2d, 
  author = "Komatitsch, D. and Vilotte, J.-P. and Cristini, P. and Labarta, J. and Le Goff, N. and Le Loher, P. and Liu, Q. and Martin, R. and Matzen, R. and Morency, C. and Peter, D. and  Tape, C. and Tromp, J. and Xie, Z.", 
  title="SPECFEM2D v7.0.0 [software]", 
  year="2012", 
  organization="Computational Infrastructure for Geodynamics", 
  optkeywords="SPECFEM2D", 
  doi="http://doi.org/NoDOI", 
  opturl="https://geodynamics.org/cig/software/specfem2d/"
 }

@article{huber_reg,
author = {Peter J. Huber},
title = {{Robust Estimation of a Location Parameter}},
volume = {35},
journal = {The Annals of Mathematical Statistics},
number = {1},
publisher = {Institute of Mathematical Statistics},
pages = {73--101},
year = {1964},
doi = {10.1214/aoms/1177703732},
URL = {https://doi.org/10.1214/aoms/1177703732}
}

@article{SVR,
  title={Support vector regression machines},
  author={Drucker, Harris and Burges, Chris JC and Kaufman, Linda and Smola, Alex and Vapnik, Vladimir and others},
  journal={Advances in neural information processing systems},
  volume={9},
  pages={155--161},
  year={1997},
  publisher={Morgan Kaufmann Publishers}
}

@article{ray_tracing,
author = {Lecomte, Isabelle and Lubrano-Lavadera, Paul and Anell, Ingrid and Buckley, Simon and Schmid, Daniel Walter and Heeremans, Michael},
year = {2015},
month = {11},
pages = {SAC71-SAC89},
title = {Ray-based seismic modeling of geologic models: Understanding and analyzing seismic images efficiently},
volume = {3},
journal = {Interpretation},
doi = {10.1190/INT-2015-0061.1}
}

@article{podvin,
    author = {Podvin, Pascal and Lecomte, Isabelle},
    title = "{Finite difference computation of traveltimes in very contrasted velocity models: a massively parallel approach and its associated tools}",
    journal = {Geophysical Journal International},
    volume = {105},
    number = {1},
    pages = {271--284},
    year = {1991},
    month = {04},
    abstract = "{We present a new massively parallel method for computation of first arrival times in arbitrary velocity models. An implementation on conventional sequential computers is also proposed.This method relies on a systematic application of Huygens' principle in the finite difference approximation. Such an approach explicitly takes into account the existence of different propagation modes (transmitted and diffracted body waves, head waves). Local discontinuities of the time gradient in the first arrival time field (e.g., caustics) are built as intersections of locally independent wavefronts. As a consequence, the proposed method provides accurate first traveltimes in the presence of extremely severe, arbitrarily shaped velocity contrasts.Associated with a simple procedure which accurately traces rays in the obtained time field, this method provides a very fast tool for a large spectrum of seismic and seismological problems.We show moreover that this method may also be used to obtain several arrivals at a given receiver, when the model contains reflectors. This possibility significantly extends the domain of potential geophysical applications.}",
    issn = {0956-540X},
    doi = {10.1111/j.1365-246X.1991.tb03461.x},
    url = {https://doi.org/10.1111/j.1365-246X.1991.tb03461.x},
    eprint = {https://academic.oup.com/gji/article-pdf/105/1/271/1953500/105-1-271.pdf},
}

@article{huang_tomo,
author = {Guangnan Huang and Songting Luo and Tryggvason Ari and Hongxing Li and David C. Nobes},
title = {First-arrival tomography with fast sweeping method solving the factored eikonal equation},
journal = {Exploration Geophysics},
volume = {50},
number = {2},
pages = {144-158},
year  = {2019},
publisher = {Taylor & Francis},
doi = {10.1080/08123985.2019.1577110},
URL = { https://doi.org/10.1080/08123985.2019.1577110},
eprint = { https://doi.org/10.1080/08123985.2019.1577110},
}

@article{sergey_eikonal_equation,
title = {Fast sweeping method for the factored eikonal equation},
journal = {Journal of Computational Physics},
volume = {228},
number = {17},
pages = {6440-6455},
year = {2009},
issn = {0021-9991},
doi = {https://doi.org/10.1016/j.jcp.2009.05.029},
url = {https://www.sciencedirect.com/science/article/pii/S0021999109002873},
author = {Sergey Fomel and Songting Luo and Hongkai Zhao},
keywords = {Fast sweeping method, Eikonal equation, Factored eikonal equation, Source singularity},
abstract = {We develop a fast sweeping method for the factored eikonal equation. By decomposing the solution of a general eikonal equation as the product of two factors: the first factor is the solution to a simple eikonal equation (such as distance) or a previously computed solution to an approximate eikonal equation. The second factor is a necessary modification/correction. Appropriate discretization and a fast sweeping strategy are designed for the equation of the correction part. The key idea is to enforce the causality of the original eikonal equation during the Gauss–Seidel iterations. Using extensive numerical examples we demonstrate that (1) the convergence behavior of the fast sweeping method for the factored eikonal equation is the same as for the original eikonal equation, i.e., the number of iterations for the Gauss–Seidel iterations is independent of the mesh size, (2) the numerical solution from the factored eikonal equation is more accurate than the numerical solution directly computed from the original eikonal equation, especially for point sources.}
}

@article{qian,
author = {Qian, Jianliang and Zhang, Yong-Tao and Zhao, Hong-Kai},
year = {2007},
month = {01},
pages = {83--107},
title = {Fast Sweeping Methods for Eikonal Equations on Triangular Meshes},
volume = {45},
journal = {SIAM J. Numerical Analysis},
doi = {10.1137/050627083}
}

@article{billette,
    author = {Billette, Frederic and Lambare, Gilles},
    title = "{Velocity macro-model estimation from seismic reflection data by stereotomography}",
    journal = {Geophysical Journal International},
    volume = {135},
    number = {2},
    pages = {671-690},
    year = {1998},
    month = {11},
    abstract = "{We introduce a new tomographic method for estimating velocity macro-models from seismic reflection data. In addition to traveltimes picked on locally coherent reflected events, the method requires that the associated local slopes of the events be picked simultaneously in the common-shot and common-receiver trace gathers. The data then consist of a discrete collection of traveltimes, positions and slopes for selected reflected events. Unlike traveltime tomography, picked events are only required to be locally coherent. It is not necessary to follow continuous arrivals all over the trace gathers. Indeed, the method does not require the introduction of interfaces in the model description.Several approaches of tomography using the slope have already been proposed. We present a unified formulation for slope tomography methods, in which the model is described by the velocity field and a set of ray-segment pairs associated with the reflected/diffracted events. We propose a new robust slope tomography method, which we call ‘stereotomography’. It consists of fitting all observed data (positions, slopes and traveltimes) to data calculated by ray tracing. There are no theoretical limitations in stereotomography for laterally heterogeneous velocity macro-models.Practically, traveltimes and slopes are picked on local slant stack panels. Ray multipathing can be accounted for since paths are discriminated by their associated slopes. The non-linear inverse problem is iteratively resolved by a local optimization. The Fréchet derivatives are estimated by paraxial ray tracing.Validation tests on 1-D and 2-D synthetic data are analysed. In the first 1-D example, we study the sensitivity of the method to model parameters (using a singular-value decomposition). The second 1-D example evaluates picking precision and shows that it is sufficient for constraining the velocity field. The last example is a 2-D application in which data are calculated directly by ray tracing. It shows the performance of the method in the presence of strong lateral velocity variations.}",
    issn = {0956-540X},
    doi = {10.1046/j.1365-246X.1998.00632.x},
    url = {https://doi.org/10.1046/j.1365-246X.1998.00632.x},
    eprint = {https://academic.oup.com/gji/article-pdf/135/2/671/2344430/135-2-671.pdf},
}

@incollection{TALWANI2003709,
title = {Exploration Geophysics},
editor = {Robert A. Meyers},
booktitle = {Encyclopedia of Physical Science and Technology (Third Edition)},
publisher = {Academic Press},
edition = {Third Edition},
address = {New York},
pages = {709-726},
year = {2003},
isbn = {978-0-12-227410-7},
doi = {https://doi.org/10.1016/B0-12-227410-5/00238-6},
url = {https://www.sciencedirect.com/science/article/pii/B0122274105002386},
author = {Manik Talwani and Walter Kessinger}
}

@article{Hole_inversion,
author = {Hole, J. A.},
title = {Nonlinear high-resolution three-dimensional seismic travel time tomography},
journal = {Journal of Geophysical Research: Solid Earth},
volume = {97},
number = {B5},
pages = {6553-6562},
doi = {https://doi.org/10.1029/92JB00235},
url = {https://agupubs.onlinelibrary.wiley.com/doi/abs/10.1029/92JB00235},
eprint = {https://agupubs.onlinelibrary.wiley.com/doi/pdf/10.1029/92JB00235},
abstract = {A tomographic inversion procedure is described and applied to a synthetic three-dimensional (3-D) seismic refraction data set, demonstrating that tomography is capable of determining a densely sampled velocity model with large velocity contrasts. Forward and inverse modeling procedures are chosen to minimize the computational costs of the inversion. Parameterizing the linearized inversion using functions defined along the ray paths, simple backprojection with zero pixel size is shown to exactly solve the linear problem, producing the smallest model for the slowness perturbation. For small grid cells, simple backprojection closely approximates the exact solution and is a sufficient solution for an iterative nonlinear inversion. This eliminates the need to store or solve a large system of linear equations. Accurate first arrival travel times are rapidly computed using a finite difference algorithm. Forward modeling between each simple backprojection allows the procedure to correctly account for the locations of the rays. This becomes more important as the spatial resolution of the model is improved. The computational efficiency of the entire nonlinear procedure allows the model to be densely sampled, providing a spatially well-resolved 3-D tomographic image. The synthetic refraction survey is designed to be similar to a published 3-D survey over the East Pacific Rise. Tests based on this example and others show that 3-D tomography is capable of inverting a large travel time data set for detailed earth structure with large lateral velocity variations and is stable in the presence of noisy data.},
year = {1992}
}

@article{Hobro_inversion,
    author = {Hobro, James W. D. and Singh, Satish C. and Minshull, Timothy A.},
    title = "{Three-dimensional tomographic inversion of combined reflection and refraction seismic traveltime data}",
    journal = {Geophysical Journal International},
    volume = {152},
    number = {1},
    pages = {79-93},
    year = {2003},
    month = {01},
    abstract = "{A tomographic inversion method is presented for the determination of 3-D velocity and interface structure from a wide range of body-wave seismic traveltime data types. It is applicable to refraction, wide-angle reflection, normal-incidence and multichannel seismic data, and is best suited to a combination of these that provides good independent constraints on seismic velocities and interface depths. The inversion process seeks a layer–interface minimum-structure model that is able to explain the given data satisfactorily by inverting to minimize data misfit and model roughness norms simultaneously. This regularized inversion, and the use of smooth functions to describe velocities and depths, allows the highly non-linear tomographic problem to be approximated as a series of linear steps. The inversion process begins by optimizing the fit to the data of a highly-smoothed initial model. In each subsequent step, structure is allowed to develop in the model with successively greater detail evolving until a satisfactory fit to the data is obtained. Parameter uncertainties for the final model are then estimated using an a posteriori covariance matrix analysis. Smooth layer–interface models are parametrized using regular grids of velocity and depth nodes from which spline-interpolated interface surfaces and velocity fields are defined. Forward modelling is achieved using ray perturbation theory and a two-point ray tracing method that is optimized for a large number of closely-spaced shot or receiver points. The method may be used to generate 1- and 2-D models (from, for example vertical seismic profile data or 2-D surveys) in which the 3-D geometry of a survey is correctly accounted for. The ability of the method to resolve typical target structures is tested in a synthetic salt dome inversion. From a set of noisy traveltime data, the model converges quickly to a well-resolved final model from different starting models. The application of this method to real data is demonstrated with a combined 3-D inversion of refraction and reflection data which provide P-wave velocity constraints on the methane hydrate stability zone in the Cascadia Margin offshore Vancouver Island.}",
    issn = {0956-540X},
    doi = {10.1046/j.1365-246X.2003.01822.x},
    url = {https://doi.org/10.1046/j.1365-246X.2003.01822.x},
    eprint = {https://academic.oup.com/gji/article-pdf/152/1/79/2077032/152-1-79.pdf},
}

@article{Bording_inversion,
    author = {Bording, R. Phillip and Gersztenkorn, Adam and Lines, Larry R. and Scales, John A. and Treitel, Sven},
    title = "{Applications of seismic travel-time tomography}",
    journal = {Geophysical Journal International},
    volume = {90},
    number = {2},
    pages = {285-303},
    year = {1987},
    month = {08},
    abstract = "{This paper describes the application of tomography to seismic travel-time inversion. There are various implementations of travel-time tomography. In reflection tomography, sources and receivers are on the surface of the Earth and the principal seismic events are reflections from subsurface velocity discontinuities. In transmission tomography, sources and/or receivers may be buried beneath the surface and the events correspond to direct, or unreflected, arrivals; this is the analogue of medical tomography. There are also cases in which both direct as well as reflected arrivals are important, such as in Vertical Seismic Profiling. The latter is a direct application of the first two, but is not discussed in any detail here. It is also shown how the iterative use of travel-time tomography and depth migration can produce much enhanced subsurface images. Examples of both transmission tomography and reflection tomography combined with depth migration illustrate the methods.}",
    issn = {0956-540X},
    doi = {10.1111/j.1365-246X.1987.tb00728.x},
    url = {https://doi.org/10.1111/j.1365-246X.1987.tb00728.x},
    eprint = {https://academic.oup.com/gji/article-pdf/90/2/285/2563963/90-2-285.pdf},
}

@inproceedings{Xu_FWI,
  title={Full waveform inversion for reflected seismic data},
  author={Xu, Sheng and Wang, D and Chen, F and Zhang, Yu and Lambare, G},
  booktitle={74th EAGE Conference and Exhibition incorporating EUROPEC 2012},
  pages={cp--293},
  year={2012},
  organization={European Association of Geoscientists \& Engineers}
}

@incollection{intro_FWI,
  title={An introduction to full waveform inversion},
  author={Virieux, Jean and Asnaashari, Amir and Brossier, Romain and M{\'e}tivier, Ludovic and Ribodetti, Alessandra and Zhou, Wei},
  booktitle={Encyclopedia of exploration geophysics},
  pages={R1--R40},
  year={2017},
  publisher={Society of Exploration Geophysicists}
}

@article{Dan,
author = {Dan Kosloff and John Sherwood and Zvi Koren and Elana Machet and Yael Falkovitz},
title = {Velocity and interface depth determination by tomography of depth migrated gathers},
journal = {GEOPHYSICS},
volume = {61},
number = {5},
pages = {1511-1523},
year = {1996},
doi = {10.1190/1.1444076},
URL = {https://doi.org/10.1190/1.1444076},
eprint = {https://doi.org/10.1190/1.1444076},
abstract = { A method for velocity and interface depth determination based on tomography of migrated common reflecting point (CRP) gathers is presented. The method is derived from the tomographic principle that relates traveltime change along a given ray to perturbations in slowness and layer depths. The tomographic principle is used to convert depth errors in migrated CRP gathers to time errors along a CRP ray pair and thus enable use of conventional traveltime tomography. It is also used to affect a very fast prestack migration and set up the tomography matrix. The velocity‐depth determination method uses the available offsets of all CRPs and inverts for the parameters of all layers simultaneously. Hand picking of depth errors on CRP gathers is avoided by a method where the tomography matrix operates directly on the migrated gathers. The velocity‐depth determination method is demonstrated on a synthetic example and on a field example from the North Sea. },
}

@article{Jones2010TutorialVE,
  title={Tutorial: Velocity estimation via ray-based tomography},
  author={Jones, Ian F},
  journal={first break},
  volume={28},
  number={2},
  year={2010},
  publisher={European Association of Geoscientists \& Engineers}
}

@article{lecun2015deeplearning,
  abstract = {Deep learning allows computational models that are composed of multiple processing layers to learn representations of data with multiple levels of abstraction. These methods have dramatically improved the state-of-the-art in speech recognition, visual object recognition, object detection and many other domains such as drug discovery and genomics. Deep learning discovers intricate structure in large data sets by using the backpropagation algorithm to indicate how a machine should change its internal parameters that are used to compute the representation in each layer from the representation in the previous layer. Deep convolutional nets have brought about breakthroughs in processing images, video, speech and audio, whereas recurrent nets have shone light on sequential data such as text and speech.},
  added-at = {2019-11-21T08:33:04.000+0100},
  author = {LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey},
  bibsource = {dblp computer science bibliography, https://dblp.org},
  biburl = {https://www.bibsonomy.org/bibtex/22d2dbe7ffc1f95b46d0fc31b9b2706d6/muehlburger},
  description = {Deep Learning first paper.},
  doi = {10.1038/nature14539},
  interhash = {6e8511bc64ba3e808ebf330db96a4ea5},
  intrahash = {2d2dbe7ffc1f95b46d0fc31b9b2706d6},
  journal = {Nature},
  keywords = {anomaly-detection deep-learning machine-learning},
  number = 7553,
  pages = {436--444},
  timestamp = {2019-11-21T08:33:04.000+0100},
  title = {Deep Learning},
  url = {https://doi.org/10.1038/nature14539},
  volume = 521,
  year = 2015
}

@article{Zheng_deep_learning_inversion,
author = { York Zheng  and  Qie Zhang  and  Anar Yusifov  and  Yunzhi Shi },
title = {Applications of supervised deep learning for seismic interpretation and inversion},
journal = {The Leading Edge},
volume = {38},
number = {7},
pages = {526-533},
year = {2019},
doi = {10.1190/tle38070526.1},
URL = {https://doi.org/10.1190/tle38070526.1},
eprint = { https://doi.org/10.1190/tle38070526.1},
abstract = { Abstract Recent advances in machine learning and its applications in various sectors are generating a new wave of experiments and solutions to solve geophysical problems in the oil and gas industry. We present two separate case studies in which supervised deep learning is used as an alternative to conventional techniques. The first case is an example of image classification applied to seismic interpretation. A convolutional neural network (CNN) is trained to pick faults automatically in 3D seismic volumes. Every sample in the input seismic image is classified as either a nonfault or fault with a certain dip and azimuth that are predicted simultaneously. The second case is an example of elastic model building — casting prestack seismic inversion as a machine learning regression problem. A CNN is trained to make predictions of 1D velocity and density profiles from input seismic records. In both case studies, we demonstrate that CNN models trained from synthetic data can be used to make efficient and effective predictions on field data. While results from the first example show that high-quality fault picks can be predicted from migrated seismic images, we find that it is more challenging in the prestack seismic inversion case where constraining the subsurface geologic variations and careful preconditioning of input seismic data are important for obtaining reasonably reliable results. This observation matches our experience using conventional workflows and methods, which also respond to improved signal to noise after migration and stack, and the inherent subsurface ambiguity makes unique parameter inversion difficult. }
}

@ARTICLE{Li_deep_learning_inversion,
  author={Li, Shucai and Liu, Bin and Ren, Yuxiao and Chen, Yangkang and Yang, Senlin and Wang, Yunhai and Jiang, Peng},
  journal={IEEE Transactions on Geoscience and Remote Sensing}, 
  title={Deep-Learning Inversion of Seismic Data}, 
  year={2020},
  volume={58},
  number={3},
  pages={2135-2149},
  doi={10.1109/TGRS.2019.2953473},
}

@ARTICLE{Adler_deep,
  author={Adler, Amir and Araya-Polo, Mauricio and Poggio, Tomaso},
  journal={IEEE Signal Processing Magazine}, 
  title={Deep Learning for Seismic Inverse Problems: Toward the Acceleration of Geophysical Analysis Workflows}, 
  year={2021},
  volume={38},
  number={2},
  pages={89-119},
  doi={10.1109/MSP.2020.3037429},
}

@article{CAO201547,
title = {A computational method for full waveform inversion of crosswell seismic data using automatic differentiation},
journal = {Computer Physics Communications},
volume = {188},
pages = {47-58},
year = {2015},
issn = {0010-4655},
doi = {https://doi.org/10.1016/j.cpc.2014.11.002},
url = {https://www.sciencedirect.com/science/article/pii/S0010465514003725},
author = {Danping Cao and Wenyuan Liao},
keywords = {Full waveform inversion, Adjoint state method, Automatic differentiation, Numerical optimization, Inverse problem, Crosswell seismic data},
abstract = {Full waveform inversion (FWI) is a model-based data-fitting technique that has been widely used to estimate model parameters in Geophysics. In this work, we propose an efficient computational approach to solve the FWI of crosswell seismic data. The FWI problem is mathematically formulated as a partial differential equation (PDE)-constrained optimization problem, which is numerically solved using a gradient-based optimization method. The efficiency and accuracy of FWI are mainly determined by the three main components: forward modeling, gradient calculation and model update which usually involves the gradient-based optimization algorithm. Given the large number of iterations needed by FWI, an accurate gradient is critical for the success of FWI, as it will not only speed up the convergence but also increase the accuracy of the solution. However computing the gradient still remains a challenging task even after the adjoint PDE has been derived. Automatic differentiation (AD) tools have been proved very effective in a variety of application areas including Geoscience. In this work we investigated the feasibility of integrating TAPENADE, a powerful AD tool into FWI, so that the FWI workflow is simplified to allow us to focus on the forward modeling and the model updating. In this paper we choose the limited-memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) method due to its robustness and fast convergence. Numerical experiments have been conducted to demonstrate the effectiveness, efficiency and robustness of the new computational approach for FWI.},
}

@article{richardson2018seismic,
      title={Seismic Full-Waveform Inversion Using Deep Learning Tools and Techniques}, 
      author={Alan Richardson},
      year={2018},
      eprint={1801.07232},
      archivePrefix={arXiv},
      primaryClass={physics.geo-ph},
}

@article{Zhu_2021,
  title={A general approach to seismic inversion with automatic differentiation},
  author={Zhu, Weiqiang and Xu, Kailai and Darve, Eric and Beroza, Gregory C},
  journal={Computers \& Geosciences},
  volume={151},
  pages={104751},
  year={2021},
  publisher={Elsevier}
}

@article{baydin2018automatic,
  title={Automatic differentiation in machine learning: a survey},
  author={Baydin, Atilim Gunes and Pearlmutter, Barak A and Radul, Alexey Andreyevich and Siskind, Jeffrey Mark},
  journal={Journal of machine learning research},
  volume={18},
  year={2018},
  publisher={Journal of Machine Learning Research}
}

@article{PickNet,
author = {Wang, Jian and Xiao, Zhuowei and Liu, Chang and Zhao, Dapeng and Yao, Zhenxing},
title = {Deep Learning for Picking Seismic Arrival Times},
journal = {Journal of Geophysical Research: Solid Earth},
volume = {124},
number = {7},
pages = {6612-6624},
keywords = {deep learning, seismic tomography, arrival times},
doi = {https://doi.org/10.1029/2019JB017536},
url = {https://agupubs.onlinelibrary.wiley.com/doi/abs/10.1029/2019JB017536},
eprint = {https://agupubs.onlinelibrary.wiley.com/doi/pdf/10.1029/2019JB017536},
abstract = {Abstract Arrival times of seismic phases contribute substantially to the study of the inner working of the Earth. Despite great advances in seismic data collection, the usage of seismic arrival times is still insufficient because of the overload manual picking tasks for human experts. In this work we employ a deep-learning method (PickNet) to automatically pick much more P and S wave arrival times of local earthquakes with a picking accuracy close to that by human experts, which can be used directly to determine seismic tomography. A large number of high-quality seismic arrival times obtained with the deep-learning model may contribute greatly to improve our understanding of the Earth's interior structure.},
year = {2019}
}

@inproceedings{AR-picker,
  title={A technique for automatic detection of onset time of P-and S-phases in strong motion records},
  author={Akazawa, Takashi},
  booktitle={Proc. of the 13th World Conf. on Earthquake Engineering},
  year={2004},
  organization={Vancouver, Canada}
}

@article{VGG-16,
  title={Very deep convolutional networks for large-scale image recognition},
  author={Simonyan, Karen and Zisserman, Andrew},
  journal={arXiv preprint arXiv:1409.1556},
  year={2014}
}

@article{dl_dispersion_curves,
author = {Dai, Tianyu and Xia, Jianghai and Ning, Ling and Chaoqiang, Xi and Liu, Ya and Xing, Huaixue},
year = {2021},
month = {01},
pages = {1-27},
title = {Deep Learning for Extracting Dispersion Curves},
volume = {42},
journal = {Surveys in Geophysics},
doi = {10.1007/s10712-020-09615-3}
}

@article{adjoint_seismo,
author = {Fichtner, Andreas and Bunge, Hans-peter and Igel, Heiner},
year = {2006},
month = {08},
pages = {86-104},
title = {The adjoint method in seismology: I. Theory},
volume = {157},
journal = {Physics of the Earth and Planetary Interiors},
doi = {10.1016/j.pepi.2006.03.016}
}

@article{PATERA1984468,
title = {A spectral element method for fluid dynamics: Laminar flow in a channel expansion},
journal = {Journal of Computational Physics},
volume = {54},
number = {3},
pages = {468-488},
year = {1984},
issn = {0021-9991},
doi = {https://doi.org/10.1016/0021-9991(84)90128-1},
url = {https://www.sciencedirect.com/science/article/pii/0021999184901281},
author = {Anthony T Patera},
abstract = {A spectral element method that combines the generality of the finite element method with the accuracy of spectral techniques is proposed for the numerical solution of the incompressible Navier-Stokes equations. In the spectral element discretization, the computational domain is broken into a series of elements, and the velocity in each element is represented as a high-order Lagrangian interpolant through Chebyshev collocation points. The hyperbolic piece of the governing equations is then treated with an explicit collocation scheme, while the pressure and viscous contributions are treated implicitly with a projection operator derived from a variational principle. The implementation of the technique is demonstrated on a one-dimensional inflow-outflow advection-diffusion equation, and the method is then applied to laminar two-dimensional (separated) flow in a channel expansion. Comparisons are made with experiment and previous numerical work.}
}

@INPROCEEDINGS{1989sasc.proc...71M,
author = {{Maday}, Yvon and {Patera}, Anthony T.},
title = "{Spectral element methods for the incompressible Navier-Stokes equations}",
keywords = {Computational Fluid Dynamics, Incompressible Flow, Navier-Stokes Equation, Spectral Methods, Elliptic Differential Equations, Numerical Flow Visualization, Stokes Flow, Three Dimensional Flow, Two Dimensional Flow, Unsteady Flow, Fluid Mechanics and Heat Transfer},
booktitle = {IN: State-of-the-art surveys on computational mechanics (A90-47176 21-64). New York},
year = 1989,
month = jan,
pages = {71-143},
adsurl = {https://ui.adsabs.harvard.edu/abs/1989sasc.proc...71M},
adsnote = {Provided by the SAO/NASA Astrophysics Data System}
}

@article{mousavi2020earthquake,
  title={Earthquake transformer: an attentive deep-learning model for simultaneous earthquake detection and phase picking},
  author={Mousavi, S Mostafa and Ellsworth, William L and Zhu, Weiqiang and Chuang, Lindsay Y and Beroza, Gregory C},
  journal={Nature communications},
  volume={11},
  number={1},
  pages={1--12},
  year={2020},
  publisher={Nature Publishing Group}
}
